{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:43.572160800Z",
     "start_time": "2024-03-17T01:27:40.824935200Z"
    }
   },
   "outputs": [],
   "source": [
    "# imports\n",
    "import javalang\n",
    "import os\n",
    "from app.database import Database\n",
    "from app.classes import IterationStatus, BatchAttemptStatus, TestStatus, AttemptStatus, Dataset\n",
    "from typing import List\n",
    "from app.constants import CONFIG\n",
    "import pickle\n",
    "import xml.etree.ElementTree as ET\n",
    "from app.utils import etree_to_dict\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "# constants\n",
    "PROJECT_PATH = r\"D:\\IDEA_Projects\\TestJavaCode\"\n",
    "DATASET_NAME1 = r\"jfreechart\"\n",
    "DATASET_NAME2 = r\"jfreechart154\""
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:43.575355400Z",
     "start_time": "2024-03-17T01:27:43.570616200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "# read dataset\n",
    "with open(f\"{CONFIG.dataset_folder}/{DATASET_NAME1}.pkl\", \"rb\") as f:\n",
    "    raw_dataset = pickle.load(f)\n",
    "    dataset1 = Dataset(**raw_dataset)\n",
    "\n",
    "with open(f\"{CONFIG.dataset_folder}/{DATASET_NAME2}.pkl\", \"rb\") as f:\n",
    "    raw_test_dataset = pickle.load(f)\n",
    "    dataset2 = Dataset(**raw_test_dataset)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:43.781538700Z",
     "start_time": "2024-03-17T01:27:43.574828200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [],
   "source": [
    "def handle_files(string):\n",
    "    lines = string.splitlines()\n",
    "    lines = [x.strip() for x in lines]\n",
    "    lines = [x for x in lines if x]\n",
    "    return \"\".join(lines)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:43.782535900Z",
     "start_time": "2024-03-17T01:27:43.774165100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "read time: 0.042999267578125\n",
      "decompress time: 3.7555201053619385\n",
      "loads time: 4.0817646980285645\n",
      "assembling time: 3.6815402507781982\n"
     ]
    }
   ],
   "source": [
    "from app.database import Database\n",
    "db = Database()\n",
    "all_dataset = db.get_all()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:30:21.034398500Z",
     "start_time": "2024-03-17T01:30:08.844506200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lang_1_fixed 47\n",
      "commons-csv 58\n",
      "commons-cli 61\n",
      "gson 66\n",
      "jfreechart 82\n",
      "lang_1_fixed 89\n",
      "gson 98\n",
      "commons-csv 99\n",
      "commons-cli 100\n",
      "jfreechart 101\n",
      "jfreechart154 123\n",
      "jfreechart154 126\n",
      "lang_1_fixed 127\n",
      "gson 128\n",
      "jfreechart154 130\n",
      "jfreechart154 131\n"
     ]
    }
   ],
   "source": [
    "for b in all_dataset:\n",
    "    print(b.dataset, b.doc_id)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:30:21.037648500Z",
     "start_time": "2024-03-17T01:30:21.034398500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [],
   "source": [
    "old_batch = [x for x in all_dataset if x.doc_id == 101][0]\n",
    "dataset1_idx_map = {}\n",
    "count = -1\n",
    "for class_info in dataset1:\n",
    "    for method_info in class_info:\n",
    "        count += 1\n",
    "        key = f\"{class_info.class_name}#{method_info.signature}#{count}\"\n",
    "        dataset1_idx_map[key] = handle_files(method_info.content)\n",
    "\n",
    "old_map = {}\n",
    "for attempt in old_batch.attempts:\n",
    "    key = f\"{attempt.name}#{attempt.idx}\"\n",
    "    if key in dataset1_idx_map:\n",
    "        old_map[attempt.name] = (dataset1_idx_map[key], attempt)\n",
    "    else:\n",
    "        assert False"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:55.620979900Z",
     "start_time": "2024-03-17T01:27:55.581014400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "new_batch = [x for x in all_dataset if x.doc_id == 123][0]\n",
    "dataset2_idx_map = {}\n",
    "count = -1\n",
    "for class_info in dataset2:\n",
    "    for method_info in class_info:\n",
    "        count += 1\n",
    "        key = f\"{class_info.class_name}#{method_info.signature}#{count}\"\n",
    "        dataset2_idx_map[key] = handle_files(method_info.content)\n",
    "\n",
    "new_map = {}\n",
    "for attempt in new_batch.attempts:\n",
    "    key = f\"{attempt.name}#{attempt.idx}\"\n",
    "    if key in dataset2_idx_map:\n",
    "        new_map[attempt.name] = (dataset2_idx_map[key], attempt)\n",
    "    else:\n",
    "        assert False"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:27:55.628581100Z",
     "start_time": "2024-03-17T01:27:55.601981200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Counter({'PASS/CE/SE/FAIL -> DIRECT': 3037, 'RE -> FAIL': 1413, 'DIFFERENT -> FAIL': 638})\n"
     ]
    }
   ],
   "source": [
    "from collections import Counter\n",
    "\n",
    "status = []\n",
    "generated_attempts = []\n",
    "save_money_attempt = []\n",
    "for attempt in new_batch.attempts:\n",
    "    if attempt.name in old_map:\n",
    "        old_content, old_attempt = old_map[attempt.name]\n",
    "        new_content, new_attempt = new_map[attempt.name]\n",
    "        if old_content != new_content:\n",
    "            new_attempt.sub_iteration_status = None\n",
    "            new_attempt.iteration_status = None\n",
    "            generated_attempts.append(new_attempt)\n",
    "            status.append(\"DIFFERENT -> FAIL\")\n",
    "        else:\n",
    "            old_attempt.idx = new_attempt.idx\n",
    "            if old_attempt.type in [IterationStatus.Type.PASS,IterationStatus.Type.COMPILE_ERROR, IterationStatus.Type.SYNTAX_ERROR, IterationStatus.Type.FAIL]:\n",
    "                status.append(\"PASS/CE/SE/FAIL -> DIRECT\")\n",
    "                generated_attempts.append(old_attempt)\n",
    "            elif old_attempt.type == IterationStatus.Type.RUNTIME_ERROR:\n",
    "                status.append(\"RE -> FAIL\")\n",
    "                save_money_attempt.append((new_attempt.idx, old_attempt.sub_iteration_status))\n",
    "                old_attempt.sub_iteration_status = None\n",
    "                old_attempt.iteration_status = None\n",
    "                generated_attempts.append(old_attempt)\n",
    "            else:\n",
    "                print(old_attempt.type)\n",
    "                assert False\n",
    "print(Counter(status))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T10:42:42.413713Z",
     "start_time": "2024-03-16T10:42:42.360454300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "add new attempts 684\n",
      "now attempts 978\n",
      "b3 remain 4794\n",
      "now attempts 5772\n"
     ]
    }
   ],
   "source": [
    "b1 = [x for x in all_dataset if x.doc_id == 123][0]\n",
    "b2 = [x for x in all_dataset if x.doc_id == 130][0]\n",
    "b3 = [x for x in all_dataset if x.doc_id == 131][0]\n",
    "# 找出b1.attempts中的idx在b2.attempts中不存在的\n",
    "b1_idx = [x.idx for x in b1.attempts]\n",
    "b2_idx = [x.idx for x in b2.attempts]\n",
    "diff_b1_b2 = [x for x in b1_idx if x not in b2_idx]\n",
    "# 将差别部分添加到新batch中\n",
    "new_attempts = []\n",
    "new_attempts.extend([x for x in b1.attempts if x.idx in diff_b1_b2])\n",
    "print(\"add new attempts\", len(new_attempts))\n",
    "# 比较b2和b3的差别\n",
    "get_compile_error = lambda x: [_ for _ in x if _.type == IterationStatus.Type.COMPILE_ERROR]\n",
    "b2_ce = get_compile_error(b2.attempts)\n",
    "b3_ce = get_compile_error(b3.attempts)\n",
    "b2_ce_idx = [x.idx for x in b2_ce]\n",
    "b3_ce_idx = [x.idx for x in b3_ce]\n",
    "diff_b3_b2 = [x for x in b3_ce_idx if x not in b2_ce_idx]\n",
    "# 将差别部分添加到新batch中\n",
    "def to_fail(attempt):\n",
    "    attempt.sub_iteration_status = None\n",
    "    attempt.iteration_status = None\n",
    "    return attempt\n",
    "new_attempts.extend([to_fail(x) for x in b3.attempts if x.idx in diff_b3_b2])\n",
    "print(\"now attempts\", len(new_attempts))\n",
    "# 将b3剩下的内容添加到新batch中\n",
    "b3_remain = [x for x in b3.attempts if x.idx not in diff_b3_b2]\n",
    "print(\"b3 remain\", len(b3_remain))\n",
    "new_attempts.extend(b3_remain)\n",
    "print(\"now attempts\", len(new_attempts))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:30:38.704046400Z",
     "start_time": "2024-03-17T01:30:38.499543300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "read time: 0.04450869560241699\n",
      "decompress time: 3.7254297733306885\n",
      "loads time: 4.178632497787476\n",
      "dumps time: 2.4713973999023438\n",
      "compress time: 23.007278442382812\n",
      "write time: 0.1072993278503418\n"
     ]
    }
   ],
   "source": [
    "# for i in range(684):\n",
    "#      new_attempts[i].sub_iteration_status = None\n",
    "#      new_attempts[i].iteration_status = None\n",
    "#\n",
    "# batch_attempt_status = BatchAttemptStatus(\n",
    "#     attempts=new_attempts,\n",
    "#     flow_run_request=b1.flow_run_request,\n",
    "#     start_time_ms=0,\n",
    "#     end_time_ms=0,\n",
    "#     dataset=\"jfreechart154\",\n",
    "#     mode=b1.mode,\n",
    "# )\n",
    "# db.insert(batch_attempt_status)\n",
    "db.delete([129])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T01:59:28.057762Z",
     "start_time": "2024-03-17T01:58:53.393683300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1413\n",
      "1413\n"
     ]
    }
   ],
   "source": [
    "print(len(save_money_attempt))\n",
    "save_money_attempt_2 = [(x,y[0]) for x,y in save_money_attempt if len(y) == 1]\n",
    "print(len(save_money_attempt_2))\n",
    "save_money_attempt_3 = [(x,y.llm_records) for x,y in save_money_attempt_2]\n",
    "save_money_attempt_4 = [(x,y[0].response) for x,y in save_money_attempt_3]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T10:47:29.467559800Z",
     "start_time": "2024-03-16T10:47:29.448560400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "outputs": [],
   "source": [
    "from tinydb import TinyDB\n",
    "db = TinyDB(\"jfreechart154_gpt4.json\")\n",
    "for i in save_money_attempt_4:\n",
    "    db.insert({\n",
    "        \"idx\": i[0],\n",
    "        \"response\": i[1]\n",
    "    })"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T10:51:32.327178200Z",
     "start_time": "2024-03-16T10:51:11.376854700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
